- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) #142734
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…l x, c), z), y) Attempt to keep adds/shifts closer together for LEA matching Fixes llvm#55714
| @llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesAttempt to keep adds/shifts closer together for LEA matching Fixes #55714 Full diff: https://github.com/llvm/llvm-project/pull/142734.diff 5 Files Affected: 
 diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2399936ffd827..becd03e619d32 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58010,6 +58010,23 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
     return V;
 
+  // Canonicalize hidden LEA pattern:
+  // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y)
+  // iff c < 4
+  if (VT == MVT::i32 || VT == MVT::i64) {
+    SDValue Y, Z, Shift;
+    APInt Amt;
+    if (sd_match(
+            N, m_Add(m_OneUse(m_Sub(m_AllOf(m_Value(Shift),
+                                            m_Shl(m_Value(), m_ConstInt(Amt))),
+                                    m_Value(Y))),
+                     m_Value(Z))) &&
+        Amt.ult(4) && !isa<ConstantSDNode>(Z)) {
+      return DAG.getNode(ISD::SUB, DL, VT,
+                         DAG.getNode(ISD::ADD, DL, VT, Shift, Z), Y);
+    }
+  }
+
   // add(psadbw(X,0),psadbw(Y,0)) -> psadbw(add(X,Y),0)
   // iff X and Y won't overflow.
   if (Op0.getOpcode() == X86ISD::PSADBW && Op1.getOpcode() == X86ISD::PSADBW &&
diff --git a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
index 522b42e07c6e0..beea6d36fe874 100644
--- a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
+++ b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
@@ -104,16 +104,16 @@ define i32 @PR55714_i32(i32 %n, i32 %q) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    leal (,%ecx,8), %eax
-; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR55714_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    leal (,%rsi,8), %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi,8), %eax
 ; X64-NEXT:    subl %esi, %eax
-; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    retq
   %mul = mul i32 %q, 7
   %add = add i32 %mul, %n
@@ -123,21 +123,19 @@ define i32 @PR55714_i32(i32 %n, i32 %q) {
 define i64 @PR55714_i64(i64 %n, i64 %q) {
 ; X86-LABEL: PR55714_i64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (,%eax,8), %ecx
-; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl $7, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    leal (%edx,%ecx,8), %edx
+; X86-NEXT:    subl %ecx, %edx
 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR55714_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    leaq (,%rsi,8), %rax
+; X64-NEXT:    leaq (%rdi,%rsi,8), %rax
 ; X64-NEXT:    subq %rsi, %rax
-; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    retq
   %mul = mul i64 %q, 7
   %add = add i64 %mul, %n
diff --git a/llvm/test/CodeGen/X86/apx/reloc-opt.ll b/llvm/test/CodeGen/X86/apx/reloc-opt.ll
index a5ab94b00d64b..ecc3d3297ceab 100644
--- a/llvm/test/CodeGen/X86/apx/reloc-opt.ll
+++ b/llvm/test/CodeGen/X86/apx/reloc-opt.ll
@@ -13,11 +13,9 @@
 
 
 ; CHECK-LABEL: test_regclass_not_updated_by_regalloc_1
-; APXREL: movq    (%rip), %r16
-; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
-; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
-; NOAPXREL: movq    (%rip), %rdi
-; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4
+; CHECK-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
+; CHECK: movq    (%rip), %rdi
+; CHECK-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4
 
 @gvar = external global [20000 x i8]
 
diff --git a/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll b/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
index 345014edd0e9d..5e94598565aa9 100644
--- a/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
+++ b/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
@@ -7,7 +7,6 @@
 define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE2-LABEL: dot_ext_v8i8_v8i32:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pushq %r14
 ; SSE2-NEXT:    pushq %rbx
 ; SSE2-NEXT:    movzbl (%rdi), %eax
 ; SSE2-NEXT:    movzbl (%rdi,%rsi), %ecx
@@ -18,9 +17,9 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE2-NEXT:    leaq (%rsi,%rsi,4), %rbx
 ; SSE2-NEXT:    movzbl (%rdi,%rbx), %ebx
 ; SSE2-NEXT:    movzbl (%rdi,%r9,2), %r9d
-; SSE2-NEXT:    leaq (,%rsi,8), %r14
-; SSE2-NEXT:    subq %rsi, %r14
-; SSE2-NEXT:    movzbl (%rdi,%r14), %esi
+; SSE2-NEXT:    leaq (%rdi,%rsi,8), %rdi
+; SSE2-NEXT:    subq %rsi, %rdi
+; SSE2-NEXT:    movzbl (%rdi), %esi
 ; SSE2-NEXT:    shll $16, %ecx
 ; SSE2-NEXT:    orl %eax, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
@@ -38,7 +37,6 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE2-NEXT:    paddd %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %r14
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: dot_ext_v8i8_v8i32:
@@ -46,7 +44,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE4-NEXT:    movzbl (%rdi), %eax
 ; SSE4-NEXT:    leaq (%rsi,%rsi,4), %rcx
 ; SSE4-NEXT:    leaq (%rsi,%rsi,2), %r8
-; SSE4-NEXT:    leaq (,%rsi,8), %r9
+; SSE4-NEXT:    leaq (%rdi,%rsi,8), %r9
 ; SSE4-NEXT:    subq %rsi, %r9
 ; SSE4-NEXT:    movd %eax, %xmm0
 ; SSE4-NEXT:    pinsrb $2, (%rdi,%rsi), %xmm0
@@ -55,7 +53,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE4-NEXT:    pinsrb $8, (%rdi,%rsi,4), %xmm0
 ; SSE4-NEXT:    pinsrb $10, (%rdi,%rcx), %xmm0
 ; SSE4-NEXT:    pinsrb $12, (%rdi,%r8,2), %xmm0
-; SSE4-NEXT:    pinsrb $14, (%rdi,%r9), %xmm0
+; SSE4-NEXT:    pinsrb $14, (%r9), %xmm0
 ; SSE4-NEXT:    movdqu (%rdx), %xmm1
 ; SSE4-NEXT:    pmaddwd %xmm0, %xmm1
 ; SSE4-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
@@ -70,7 +68,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; AVX-NEXT:    movzbl (%rdi), %eax
 ; AVX-NEXT:    leaq (%rsi,%rsi,2), %rcx
 ; AVX-NEXT:    leaq (%rsi,%rsi,4), %r8
-; AVX-NEXT:    leaq (,%rsi,8), %r9
+; AVX-NEXT:    leaq (%rdi,%rsi,8), %r9
 ; AVX-NEXT:    subq %rsi, %r9
 ; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    vpinsrb $2, (%rdi,%rsi), %xmm0, %xmm0
@@ -79,7 +77,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; AVX-NEXT:    vpinsrb $8, (%rdi,%rsi,4), %xmm0, %xmm0
 ; AVX-NEXT:    vpinsrb $10, (%rdi,%r8), %xmm0, %xmm0
 ; AVX-NEXT:    vpinsrb $12, (%rdi,%rcx,2), %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $14, (%rdi,%r9), %xmm0, %xmm0
+; AVX-NEXT:    vpinsrb $14, (%r9), %xmm0, %xmm0
 ; AVX-NEXT:    vpmaddwd (%rdx), %xmm0, %xmm0
 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index 03dd5351c78ac..40d591f8d1be8 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -166,12 +166,11 @@ define i64 @test_mul_by_6(i64 %x) {
 define i64 @test_mul_by_7(i64 %x) {
 ; X86-LABEL: test_mul_by_7:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (,%eax,8), %ecx
-; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl $7, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    leal (%edx,%ecx,8), %edx
+; X86-NEXT:    subl %ecx, %edx
 ; X86-NEXT:    retl
 ;
 ; X86-NOOPT-LABEL: test_mul_by_7:
@@ -733,16 +732,17 @@ define i64 @test_mul_by_22(i64 %x) {
   ret i64 %mul
 }
 
-define i64 @test_mul_by_23(i64 %x) {
+define i64 @test_mul_by_23(i64 %x) nounwind {
 ; X86-LABEL: test_mul_by_23:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (%eax,%eax,2), %ecx
-; X86-NEXT:    shll $3, %ecx
-; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %esi
 ; X86-NEXT:    movl $23, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    leal (%edx,%esi,8), %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X86-NOOPT-LABEL: test_mul_by_23:
 | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
| Hey there! The Fuchsia team is seeing dramatic slowdowns on our Mac AArch64 toolchain builders, from 3.5 hours to a timeout hit at 5 hours. This is one of two plausible commits in the blamelist. It only seems to appear in our LTO 2-stage prod builds. We do build builtins for X86 and other platforms. Do you think something in this change could cause this kind of slowdown? We haven't been able to directly determine the culprit beyond a rough range. | 
| Test case? I'm not going to rule it out but I doubt it. | 
| 
 Is it a slowdown or is the compiler hung? | 
| 
 It's difficult to tell at the moment; when our builders timeout we lose access to the ninja traces. I'm working on getting them back; I'll post back when I have more information. | 
| 
 It's definitely a compiler hang; it occurs during the first CMake ABI check against the just-built clang. It only occurs in LTO mode, which I didn't realize. I'm running through a fresh round of trials with reverts for the 4 changes in the blamelist (this is still one of them); I'll post back with either an exoneration or confirmation that this is causing the hang. | 
| I was able to reproduce the issue, and reverting this PR didn't resolve it. Sorry for the false alarm. | 
Attempt to keep adds/shifts closer together for LEA matching
Fixes #55714